# coding:utf-8
from pyspark import SparkConf, SparkContext

if __name__ == '__main__':

    # 初始化执行环境，构建SparkContext对象
    conf = SparkConf().setMaster("local[*]").setAppName("BrowserCount")
    sc = SparkContext(conf=conf)
    # 读取数据
    file_rdd = sc.textFile("../csv文件/去哪儿各景区2023.1月销.csv")
    # 切分并提取数据
    split_rdd = file_rdd.map(lambda x: x.split(","))
    sold_rdd = split_rdd.map(lambda x: (x[4], x[2]))  # (key,value)=(省份，月销)

    # reduce 数据排序
    result = sold_rdd.reduceByKey(lambda a, b: (a+b)).sortBy(lambda x: x[1]).take(30)
    sold_rdd.sortBy(lambda x: x[1]).saveAsTextFile(
        "hdfs://master:9000/user/travel_analyse/result4")
    print(result)